Importing Libraries

library(lubridate)
## Loading required package: timechange
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date()        masks base::date()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ lubridate::intersect()   masks base::intersect()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ lubridate::setdiff()     masks base::setdiff()
## ✖ lubridate::union()       masks base::union()
library(dplyr)
library(skimr)
library(stringr)
library(treemap)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(viridis)
## Loading required package: viridisLite
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
# Importing csv files needed for the project

epl <- read.csv("~/STA-418-518/epl1.csv")
laliga <- read.csv("~/STA-418-518/laliga1.csv")
league <- read.csv("~/STA-418-518/league.csv")
country <- read.csv("~/STA-418-518/country.csv")
players <- read.csv("~/STA-418-518/players.csv")
teamNames <- read.csv("~/STA-418-518/teamnames.csv")
eplman <- read.csv("~/STA-418-518/EPL MAN.csv")
laligaman <- read.csv("~/STA-418-518/LaLiga Managers.csv")
eplref <- read.csv("~/STA-418-518/EplRef.csv")
laligaref <- read.csv("~/STA-418-518/Laliga Ref.csv")
matches <- rbind(epl, laliga) # Combining data for EPL and LaLiga games together and naming it as matches.
# Creating two new variables in the matches dataframe called "Country" and "League".
matches['Country'] <- NA
matches['League'] <- NA
# Going through each rows in the data frame to match country and league ID with the names of the country and league and substituting ids with those names in the matches table.

for(i in 1:nrow(matches)){
    matches$Country[i] = country[country["id"] == matches$country_id[i]][2]
}

for(i in 1:nrow(matches)){
    matches$League[i] = league[league["id"] == matches$league_id[i]][3]
}
# Creating two new variables in the matches dataframe called "HomeTeam" and "AwayTeam".

matches['HomeTeam'] <- NA
matches['AwayTeam'] <- NA
# Going through each rows in the dataframe to match home and away team ID with the original name of the teams and placing those names in the matches table's HomeTeam and AwayTeam columns.

for(i in 1:nrow(matches)){
    matches$HomeTeam[i] = teamNames[teamNames["team_api_id"] == matches$home_team_api_id[i]][4]
}

for(i in 1:nrow(matches)){
    matches$AwayTeam[i] = teamNames[teamNames["team_api_id"] == matches$away_team_api_id[i]][4]
}
# To see if there are any missing values in any of the columns.

skim(matches)
Data summary
Name matches
Number of rows 6080
Number of columns 136
_______________________
Column type frequency:
character 14
numeric 122
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
season 0 1 9 9 0 8 0
date 0 1 8 10 0 1095 0
goal 0 1 8 4694 0 5665 0
shoton 0 1 10 8399 0 4675 0
shotoff 0 1 11 8002 0 4674 0
foulcommit 0 1 14 18077 0 4675 0
card 0 1 8 4849 0 5883 0
cross 0 1 9 22748 0 4675 0
corner 0 1 10 7838 0 4674 0
possession 0 1 14 6520 0 4664 0
Country 0 1 5 7 0 2 0
League 0 1 15 22 0 2 0
HomeTeam 0 1 6 25 0 67 0
AwayTeam 0 1 6 25 0 67 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
country_id 0 1.00 11623.50 9895.31 1729.00 1729.00 11623.50 21518.00 21518.00 ▇▁▁▁▇
league_id 0 1.00 11623.50 9895.31 1729.00 1729.00 11623.50 21518.00 21518.00 ▇▁▁▁▇
stage 0 1.00 19.50 10.97 1.00 10.00 19.50 29.00 38.00 ▇▇▇▇▇
match_api_id 0 1.00 1196727.34 493303.98 489042.00 801149.75 1140128.50 1560638.75 2030537.00 ▇▇▃▃▇
home_team_api_id 0 1.00 9087.28 768.54 7869.00 8472.00 8654.00 9869.00 10281.00 ▂▇▁▁▆
away_team_api_id 0 1.00 9087.28 768.54 7869.00 8472.00 8654.00 9869.00 10281.00 ▂▇▁▁▆
home_team_goal 0 1.00 1.59 1.35 0.00 1.00 1.00 2.00 10.00 ▇▂▁▁▁
away_team_goal 0 1.00 1.15 1.15 0.00 0.00 1.00 2.00 8.00 ▇▃▁▁▁
home_player_X1 0 1.00 1.00 0.00 1.00 1.00 1.00 1.00 1.00 ▁▁▇▁▁
home_player_X2 0 1.00 2.02 0.26 1.00 2.00 2.00 2.00 8.00 ▇▁▁▁▁
home_player_X3 0 1.00 4.02 0.26 1.00 4.00 4.00 4.00 8.00 ▁▁▇▁▁
home_player_X4 0 1.00 6.01 0.31 2.00 6.00 6.00 6.00 8.00 ▁▁▁▇▁
home_player_X5 0 1.00 7.85 0.91 1.00 8.00 8.00 8.00 9.00 ▁▁▁▁▇
home_player_X6 0 1.00 3.19 1.18 1.00 2.00 4.00 4.00 9.00 ▅▇▁▁▁
home_player_X7 0 1.00 4.94 1.09 2.00 4.00 5.00 6.00 8.00 ▁▆▂▇▁
home_player_X8 0 1.00 4.73 1.64 2.00 3.00 5.00 6.00 9.00 ▇▁▇▂▁
home_player_X9 0 1.00 5.93 1.75 1.00 5.00 5.00 8.00 9.00 ▁▂▇▁▆
home_player_X10 0 1.00 5.75 1.49 3.00 4.00 5.00 7.00 9.00 ▆▃▁▇▁
home_player_X11 0 1.00 5.54 0.70 1.00 5.00 5.00 6.00 7.00 ▁▁▁▇▆
away_player_X1 0 1.00 1.00 0.01 1.00 1.00 1.00 1.00 2.00 ▇▁▁▁▁
away_player_X2 0 1.00 2.03 0.28 1.00 2.00 2.00 2.00 8.00 ▇▁▁▁▁
away_player_X3 0 1.00 4.03 0.31 2.00 4.00 4.00 4.00 8.00 ▁▇▁▁▁
away_player_X4 0 1.00 6.01 0.36 1.00 6.00 6.00 6.00 8.00 ▁▁▁▇▁
away_player_X5 0 1.00 7.81 1.03 1.00 8.00 8.00 8.00 9.00 ▁▁▁▁▇
away_player_X6 0 1.00 3.21 1.25 1.00 2.00 4.00 4.00 9.00 ▅▇▁▁▁
away_player_X7 0 1.00 4.91 1.15 2.00 4.00 5.00 6.00 8.00 ▂▆▂▇▁
away_player_X8 0 1.00 4.73 1.64 2.00 3.00 5.00 6.00 9.00 ▇▁▇▂▁
away_player_X9 0 1.00 5.85 1.73 1.00 5.00 5.00 8.00 9.00 ▁▂▇▂▆
away_player_X10 0 1.00 5.91 1.55 1.00 4.00 7.00 7.00 9.00 ▁▅▃▇▂
away_player_X11 1 1.00 5.52 0.71 3.00 5.00 5.00 6.00 7.00 ▁▁▇▃▂
home_player_Y1 0 1.00 1.00 0.00 1.00 1.00 1.00 1.00 1.00 ▁▁▇▁▁
home_player_Y2 0 1.00 3.00 0.00 3.00 3.00 3.00 3.00 3.00 ▁▁▇▁▁
home_player_Y3 0 1.00 3.00 0.00 3.00 3.00 3.00 3.00 3.00 ▁▁▇▁▁
home_player_Y4 0 1.00 3.00 0.00 3.00 3.00 3.00 3.00 3.00 ▁▁▇▁▁
home_player_Y5 0 1.00 3.07 0.51 3.00 3.00 3.00 3.00 7.00 ▇▁▁▁▁
home_player_Y6 0 1.00 6.45 0.64 3.00 6.00 6.00 7.00 8.00 ▁▁▇▇▁
home_player_Y7 0 1.00 6.56 0.55 5.00 6.00 7.00 7.00 8.00 ▁▇▁▇▁
home_player_Y8 0 1.00 7.44 0.55 3.00 7.00 7.00 8.00 9.00 ▁▁▁▇▇
home_player_Y9 0 1.00 7.84 0.93 6.00 7.00 8.00 8.00 10.00 ▁▇▇▁▂
home_player_Y10 0 1.00 8.89 1.05 6.00 8.00 9.00 10.00 11.00 ▁▇▁▇▁
home_player_Y11 0 1.00 10.58 0.51 1.00 10.00 11.00 11.00 11.00 ▁▁▁▁▇
away_player_Y1 0 1.00 1.00 0.03 1.00 1.00 1.00 1.00 3.00 ▇▁▁▁▁
away_player_Y2 0 1.00 3.00 0.00 3.00 3.00 3.00 3.00 3.00 ▁▁▇▁▁
away_player_Y3 0 1.00 3.00 0.00 3.00 3.00 3.00 3.00 3.00 ▁▁▇▁▁
away_player_Y4 0 1.00 3.00 0.00 3.00 3.00 3.00 3.00 3.00 ▁▁▇▁▁
away_player_Y5 0 1.00 3.08 0.57 3.00 3.00 3.00 3.00 7.00 ▇▁▁▁▁
away_player_Y6 0 1.00 6.44 0.67 3.00 6.00 6.00 7.00 8.00 ▁▁▇▇▁
away_player_Y7 0 1.00 6.58 0.56 3.00 6.00 7.00 7.00 8.00 ▁▁▆▇▁
away_player_Y8 0 1.00 7.44 0.55 5.00 7.00 7.00 8.00 9.00 ▁▁▇▇▁
away_player_Y9 0 1.00 7.87 0.96 6.00 7.00 8.00 8.00 10.00 ▁▇▇▁▂
away_player_Y10 0 1.00 8.79 1.08 6.00 8.00 8.00 10.00 11.00 ▂▇▁▇▁
away_player_Y11 1 1.00 10.61 0.49 8.00 10.00 11.00 11.00 11.00 ▁▁▁▅▇
home_player_1 0 1.00 63473.61 69077.06 2984.00 30660.00 34382.00 69650.00 532942.00 ▇▁▁▁▁
home_player_2 0 1.00 91024.26 94497.68 2802.00 31303.00 37754.00 150466.00 690308.00 ▇▂▁▁▁
home_player_3 0 1.00 75413.36 90512.62 2752.00 26209.00 34193.00 77741.00 643570.00 ▇▁▁▁▁
home_player_4 0 1.00 76322.77 85980.81 2752.00 27668.00 37440.00 93458.00 580589.00 ▇▂▁▁▁
home_player_5 0 1.00 88639.59 96614.40 2752.00 31921.00 40006.00 111930.00 693138.00 ▇▂▁▁▁
home_player_6 0 1.00 81478.15 88240.96 2802.00 30893.00 38469.00 109621.00 722766.00 ▇▂▁▁▁
home_player_7 0 1.00 82098.65 89122.79 2802.00 30598.00 38609.00 109621.00 683450.00 ▇▂▁▁▁
home_player_8 0 1.00 87449.87 98323.68 2802.00 30876.00 38807.00 109058.00 683450.00 ▇▂▁▁▁
home_player_9 0 1.00 87782.95 99774.21 2770.00 30892.00 38433.00 114030.50 722766.00 ▇▂▁▁▁
home_player_10 0 1.00 86419.61 100797.74 2802.00 30840.00 38460.00 109491.00 742405.00 ▇▂▁▁▁
home_player_11 0 1.00 75563.14 88766.28 2802.00 30830.00 38044.00 75445.00 696365.00 ▇▂▁▁▁
away_player_1 0 1.00 63648.58 69294.88 2796.00 30657.00 33986.00 69650.00 532942.00 ▇▁▁▁▁
away_player_2 0 1.00 91852.77 95324.49 2790.00 31306.00 38067.00 150480.00 706985.00 ▇▂▁▁▁
away_player_3 0 1.00 73553.91 86346.81 2752.00 26552.00 34193.00 75395.00 643570.00 ▇▁▁▁▁
away_player_4 0 1.00 78355.81 90777.60 2752.00 26777.00 37451.00 97491.00 684723.00 ▇▂▁▁▁
away_player_5 0 1.00 87964.06 95279.45 2790.00 31291.00 40006.00 111865.00 693138.00 ▇▂▁▁▁
away_player_6 0 1.00 82426.76 88614.62 2802.00 30889.00 38746.00 109898.00 722766.00 ▇▂▁▁▁
away_player_7 0 1.00 82675.95 89581.30 2802.00 30655.00 38818.00 111019.50 750435.00 ▇▂▁▁▁
away_player_8 0 1.00 88887.74 101262.21 2802.00 30871.00 38807.00 108568.00 710807.00 ▇▂▁▁▁
away_player_9 0 1.00 88430.10 102085.92 2802.00 30893.00 38433.00 111990.00 722766.00 ▇▂▁▁▁
away_player_10 0 1.00 88475.45 102987.62 2770.00 30853.00 38570.00 110189.00 722766.00 ▇▂▁▁▁
away_player_11 0 1.00 77186.12 89655.95 2802.00 30830.00 38133.50 96509.00 696365.00 ▇▂▁▁▁
B365H 1 1.00 2.73 2.12 1.04 1.67 2.10 2.80 26.00 ▇▁▁▁▁
B365D 1 1.00 4.06 1.46 2.50 3.30 3.50 4.20 17.00 ▇▁▁▁▁
B365A 1 1.00 5.07 4.63 1.08 2.55 3.60 5.50 41.00 ▇▁▁▁▁
BWH 2 1.00 2.65 1.95 1.03 1.65 2.10 2.75 34.00 ▇▁▁▁▁
BWD 2 1.00 3.94 1.34 2.40 3.25 3.40 4.00 19.50 ▇▁▁▁▁
BWA 2 1.00 4.74 4.08 1.10 2.50 3.50 5.25 51.00 ▇▁▁▁▁
IWH 6 1.00 2.56 1.73 1.05 1.65 2.10 2.60 20.00 ▇▁▁▁▁
IWD 6 1.00 3.79 1.01 2.50 3.30 3.40 3.90 11.00 ▇▁▁▁▁
IWA 6 1.00 4.43 3.46 1.10 2.50 3.30 4.90 25.00 ▇▁▁▁▁
LBH 3 1.00 2.64 1.91 1.04 1.66 2.10 2.75 26.00 ▇▁▁▁▁
LBD 3 1.00 3.92 1.33 2.38 3.30 3.40 4.00 19.00 ▇▁▁▁▁
LBA 3 1.00 4.72 4.10 1.10 2.50 3.50 5.00 51.00 ▇▁▁▁▁
PSH 3044 0.50 2.89 2.53 1.04 1.66 2.15 2.99 36.00 ▇▁▁▁▁
PSD 3044 0.50 4.41 2.15 3.04 3.44 3.69 4.44 29.00 ▇▁▁▁▁
PSA 3044 0.50 5.47 5.41 1.09 2.55 3.78 5.82 47.50 ▇▁▁▁▁
WHH 1 1.00 2.70 2.03 1.02 1.67 2.10 2.75 26.00 ▇▁▁▁▁
WHD 1 1.00 3.81 1.23 2.38 3.20 3.30 3.80 17.00 ▇▁▁▁▁
WHA 1 1.00 4.92 4.60 1.08 2.50 3.50 5.50 51.00 ▇▁▁▁▁
SJH 1452 0.76 2.68 2.02 1.04 1.67 2.10 2.70 23.00 ▇▁▁▁▁
SJD 1452 0.76 3.98 1.32 2.70 3.30 3.50 4.00 15.00 ▇▁▁▁▁
SJA 1452 0.76 5.07 4.68 1.13 2.60 3.60 5.50 41.00 ▇▁▁▁▁
VCH 2 1.00 2.79 2.33 1.03 1.67 2.10 2.88 36.00 ▇▁▁▁▁
VCD 2 1.00 4.13 1.69 2.50 3.30 3.50 4.20 26.00 ▇▁▁▁▁
VCA 2 1.00 5.31 5.48 1.08 2.60 3.60 5.50 67.00 ▇▁▁▁▁
GBH 2288 0.62 2.59 1.78 1.05 1.67 2.10 2.60 17.00 ▇▁▁▁▁
GBD 2288 0.62 3.83 1.10 2.75 3.25 3.40 3.80 11.00 ▇▁▁▁▁
GBA 2288 0.62 4.63 3.67 1.12 2.60 3.50 5.00 34.00 ▇▁▁▁▁
BSH 2281 0.62 2.59 1.80 1.04 1.67 2.10 2.62 17.00 ▇▁▁▁▁
BSD 2281 0.62 3.82 1.09 2.75 3.25 3.40 3.80 13.00 ▇▁▁▁▁
BSA 2281 0.62 4.73 3.94 1.14 2.60 3.50 5.00 34.00 ▇▁▁▁▁
on_target_shot_home_team 0 1.00 4.88 3.98 0.00 1.00 5.00 7.00 26.00 ▇▅▁▁▁
on_target_shot_away_team 0 1.00 3.86 3.31 0.00 1.00 4.00 6.00 19.00 ▇▆▂▁▁
off_target_shot_home_team 0 1.00 4.91 3.80 0.00 1.00 5.00 7.00 21.00 ▇▆▂▁▁
off_target_shot_away_team 0 1.00 3.87 3.15 0.00 1.00 4.00 6.00 19.00 ▇▇▂▁▁
foul_home_team 0 1.00 8.98 6.11 0.00 5.00 10.00 13.00 32.00 ▆▇▆▁▁
foul_away_team 0 1.00 9.35 6.27 0.00 5.00 10.00 14.00 37.00 ▆▇▃▁▁
yellow_card_home_team 0 1.00 1.98 1.45 0.00 1.00 2.00 3.00 9.00 ▇▇▂▁▁
yellow_card_away_team 0 1.00 2.33 1.51 0.00 1.00 2.00 3.00 10.00 ▇▅▁▁▁
red_card_home_team 0 1.00 0.05 0.24 0.00 0.00 0.00 0.00 2.00 ▇▁▁▁▁
red_card_away_team 0 1.00 0.07 0.26 0.00 0.00 0.00 0.00 2.00 ▇▁▁▁▁
crosses_home_team 0 1.00 15.35 11.27 0.00 6.00 16.00 23.00 72.00 ▇▇▂▁▁
crosses_away_team 0 1.00 11.87 9.16 0.00 4.00 12.00 18.00 55.00 ▇▆▂▁▁
corner_home_team 0 1.00 4.64 3.72 0.00 1.00 5.00 7.00 20.00 ▇▆▂▁▁
corner_away_team 0 1.00 3.59 3.05 0.00 0.00 3.00 6.00 19.00 ▇▆▂▁▁
possession_home_team 0 1.00 52.98 8.58 4.00 48.00 53.00 59.00 83.00 ▁▁▅▇▁
possession_away_team 0 1.00 47.02 8.58 17.00 41.00 47.00 52.00 96.00 ▁▇▅▁▁
H_Age 0 1.00 27.27 1.38 23.26 26.35 27.28 28.18 32.24 ▁▆▇▂▁
A_Age 0 1.00 27.25 1.35 22.65 26.32 27.25 28.16 32.29 ▁▅▇▃▁
# Summary statistics of the whole data set
summary(matches)
##    country_id      league_id        season              stage     
##  Min.   : 1729   Min.   : 1729   Length:6080        Min.   : 1.0  
##  1st Qu.: 1729   1st Qu.: 1729   Class :character   1st Qu.:10.0  
##  Median :11624   Median :11624   Mode  :character   Median :19.5  
##  Mean   :11624   Mean   :11624                      Mean   :19.5  
##  3rd Qu.:21518   3rd Qu.:21518                      3rd Qu.:29.0  
##  Max.   :21518   Max.   :21518                      Max.   :38.0  
##                                                                   
##      date            match_api_id     home_team_api_id away_team_api_id
##  Length:6080        Min.   : 489042   Min.   : 7869    Min.   : 7869   
##  Class :character   1st Qu.: 801150   1st Qu.: 8472    1st Qu.: 8472   
##  Mode  :character   Median :1140129   Median : 8654    Median : 8654   
##                     Mean   :1196727   Mean   : 9087    Mean   : 9087   
##                     3rd Qu.:1560639   3rd Qu.: 9869    3rd Qu.: 9869   
##                     Max.   :2030537   Max.   :10281    Max.   :10281   
##                                                                        
##  home_team_goal   away_team_goal  home_player_X1 home_player_X2 
##  Min.   : 0.000   Min.   :0.000   Min.   :1      Min.   :1.000  
##  1st Qu.: 1.000   1st Qu.:0.000   1st Qu.:1      1st Qu.:2.000  
##  Median : 1.000   Median :1.000   Median :1      Median :2.000  
##  Mean   : 1.591   Mean   :1.148   Mean   :1      Mean   :2.023  
##  3rd Qu.: 2.000   3rd Qu.:2.000   3rd Qu.:1      3rd Qu.:2.000  
##  Max.   :10.000   Max.   :8.000   Max.   :1      Max.   :8.000  
##                                                                 
##  home_player_X3  home_player_X4  home_player_X5  home_player_X6 
##  Min.   :1.000   Min.   :2.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:6.000   1st Qu.:8.000   1st Qu.:2.000  
##  Median :4.000   Median :6.000   Median :8.000   Median :4.000  
##  Mean   :4.023   Mean   :6.009   Mean   :7.852   Mean   :3.188  
##  3rd Qu.:4.000   3rd Qu.:6.000   3rd Qu.:8.000   3rd Qu.:4.000  
##  Max.   :8.000   Max.   :8.000   Max.   :9.000   Max.   :9.000  
##                                                                 
##  home_player_X7  home_player_X8  home_player_X9  home_player_X10
##  Min.   :2.000   Min.   :2.000   Min.   :1.000   Min.   :3.00   
##  1st Qu.:4.000   1st Qu.:3.000   1st Qu.:5.000   1st Qu.:4.00   
##  Median :5.000   Median :5.000   Median :5.000   Median :5.00   
##  Mean   :4.944   Mean   :4.733   Mean   :5.932   Mean   :5.75   
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:8.000   3rd Qu.:7.00   
##  Max.   :8.000   Max.   :9.000   Max.   :9.000   Max.   :9.00   
##                                                                 
##  home_player_X11 away_player_X1 away_player_X2  away_player_X3  away_player_X4 
##  Min.   :1.00    Min.   :1      Min.   :1.000   Min.   :2.000   Min.   :1.000  
##  1st Qu.:5.00    1st Qu.:1      1st Qu.:2.000   1st Qu.:4.000   1st Qu.:6.000  
##  Median :5.00    Median :1      Median :2.000   Median :4.000   Median :6.000  
##  Mean   :5.54    Mean   :1      Mean   :2.027   Mean   :4.029   Mean   :6.013  
##  3rd Qu.:6.00    3rd Qu.:1      3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.:6.000  
##  Max.   :7.00    Max.   :2      Max.   :8.000   Max.   :8.000   Max.   :8.000  
##                                                                                
##  away_player_X5  away_player_X6 away_player_X7  away_player_X8  away_player_X9 
##  Min.   :1.000   Min.   :1.00   Min.   :2.000   Min.   :2.000   Min.   :1.000  
##  1st Qu.:8.000   1st Qu.:2.00   1st Qu.:4.000   1st Qu.:3.000   1st Qu.:5.000  
##  Median :8.000   Median :4.00   Median :5.000   Median :5.000   Median :5.000  
##  Mean   :7.808   Mean   :3.21   Mean   :4.906   Mean   :4.727   Mean   :5.849  
##  3rd Qu.:8.000   3rd Qu.:4.00   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:8.000  
##  Max.   :9.000   Max.   :9.00   Max.   :8.000   Max.   :9.000   Max.   :9.000  
##                                                                                
##  away_player_X10 away_player_X11 home_player_Y1 home_player_Y2 home_player_Y3
##  Min.   :1.000   Min.   :3.00    Min.   :1      Min.   :3      Min.   :3     
##  1st Qu.:4.000   1st Qu.:5.00    1st Qu.:1      1st Qu.:3      1st Qu.:3     
##  Median :7.000   Median :5.00    Median :1      Median :3      Median :3     
##  Mean   :5.907   Mean   :5.52    Mean   :1      Mean   :3      Mean   :3     
##  3rd Qu.:7.000   3rd Qu.:6.00    3rd Qu.:1      3rd Qu.:3      3rd Qu.:3     
##  Max.   :9.000   Max.   :7.00    Max.   :1      Max.   :3      Max.   :3     
##                  NA's   :1                                                   
##  home_player_Y4 home_player_Y5  home_player_Y6  home_player_Y7  home_player_Y8 
##  Min.   :3      Min.   :3.000   Min.   :3.000   Min.   :5.000   Min.   :3.000  
##  1st Qu.:3      1st Qu.:3.000   1st Qu.:6.000   1st Qu.:6.000   1st Qu.:7.000  
##  Median :3      Median :3.000   Median :6.000   Median :7.000   Median :7.000  
##  Mean   :3      Mean   :3.066   Mean   :6.447   Mean   :6.561   Mean   :7.437  
##  3rd Qu.:3      3rd Qu.:3.000   3rd Qu.:7.000   3rd Qu.:7.000   3rd Qu.:8.000  
##  Max.   :3      Max.   :7.000   Max.   :8.000   Max.   :8.000   Max.   :9.000  
##                                                                                
##  home_player_Y9   home_player_Y10  home_player_Y11 away_player_Y1
##  Min.   : 6.000   Min.   : 6.000   Min.   : 1.00   Min.   :1     
##  1st Qu.: 7.000   1st Qu.: 8.000   1st Qu.:10.00   1st Qu.:1     
##  Median : 8.000   Median : 9.000   Median :11.00   Median :1     
##  Mean   : 7.844   Mean   : 8.891   Mean   :10.58   Mean   :1     
##  3rd Qu.: 8.000   3rd Qu.:10.000   3rd Qu.:11.00   3rd Qu.:1     
##  Max.   :10.000   Max.   :11.000   Max.   :11.00   Max.   :3     
##                                                                  
##  away_player_Y2 away_player_Y3 away_player_Y4 away_player_Y5  away_player_Y6 
##  Min.   :3      Min.   :3      Min.   :3      Min.   :3.000   Min.   :3.000  
##  1st Qu.:3      1st Qu.:3      1st Qu.:3      1st Qu.:3.000   1st Qu.:6.000  
##  Median :3      Median :3      Median :3      Median :3.000   Median :6.000  
##  Mean   :3      Mean   :3      Mean   :3      Mean   :3.084   Mean   :6.435  
##  3rd Qu.:3      3rd Qu.:3      3rd Qu.:3      3rd Qu.:3.000   3rd Qu.:7.000  
##  Max.   :3      Max.   :3      Max.   :3      Max.   :7.000   Max.   :8.000  
##                                                                              
##  away_player_Y7  away_player_Y8  away_player_Y9   away_player_Y10 
##  Min.   :3.000   Min.   :5.000   Min.   : 6.000   Min.   : 6.000  
##  1st Qu.:6.000   1st Qu.:7.000   1st Qu.: 7.000   1st Qu.: 8.000  
##  Median :7.000   Median :7.000   Median : 8.000   Median : 8.000  
##  Mean   :6.577   Mean   :7.437   Mean   : 7.875   Mean   : 8.792  
##  3rd Qu.:7.000   3rd Qu.:8.000   3rd Qu.: 8.000   3rd Qu.:10.000  
##  Max.   :8.000   Max.   :9.000   Max.   :10.000   Max.   :11.000  
##                                                                   
##  away_player_Y11 home_player_1    home_player_2    home_player_3   
##  Min.   : 8.00   Min.   :  2984   Min.   :  2802   Min.   :  2752  
##  1st Qu.:10.00   1st Qu.: 30660   1st Qu.: 31303   1st Qu.: 26209  
##  Median :11.00   Median : 34382   Median : 37754   Median : 34193  
##  Mean   :10.61   Mean   : 63474   Mean   : 91024   Mean   : 75413  
##  3rd Qu.:11.00   3rd Qu.: 69650   3rd Qu.:150466   3rd Qu.: 77741  
##  Max.   :11.00   Max.   :532942   Max.   :690308   Max.   :643570  
##  NA's   :1                                                         
##  home_player_4    home_player_5    home_player_6    home_player_7   
##  Min.   :  2752   Min.   :  2752   Min.   :  2802   Min.   :  2802  
##  1st Qu.: 27668   1st Qu.: 31921   1st Qu.: 30893   1st Qu.: 30598  
##  Median : 37440   Median : 40006   Median : 38469   Median : 38609  
##  Mean   : 76323   Mean   : 88640   Mean   : 81478   Mean   : 82099  
##  3rd Qu.: 93458   3rd Qu.:111930   3rd Qu.:109621   3rd Qu.:109621  
##  Max.   :580589   Max.   :693138   Max.   :722766   Max.   :683450  
##                                                                     
##  home_player_8    home_player_9    home_player_10   home_player_11  
##  Min.   :  2802   Min.   :  2770   Min.   :  2802   Min.   :  2802  
##  1st Qu.: 30876   1st Qu.: 30892   1st Qu.: 30840   1st Qu.: 30830  
##  Median : 38807   Median : 38433   Median : 38460   Median : 38044  
##  Mean   : 87450   Mean   : 87783   Mean   : 86420   Mean   : 75563  
##  3rd Qu.:109058   3rd Qu.:114031   3rd Qu.:109491   3rd Qu.: 75445  
##  Max.   :683450   Max.   :722766   Max.   :742405   Max.   :696365  
##                                                                     
##  away_player_1    away_player_2    away_player_3    away_player_4   
##  Min.   :  2796   Min.   :  2790   Min.   :  2752   Min.   :  2752  
##  1st Qu.: 30657   1st Qu.: 31306   1st Qu.: 26552   1st Qu.: 26777  
##  Median : 33986   Median : 38067   Median : 34193   Median : 37451  
##  Mean   : 63649   Mean   : 91853   Mean   : 73554   Mean   : 78356  
##  3rd Qu.: 69650   3rd Qu.:150480   3rd Qu.: 75395   3rd Qu.: 97491  
##  Max.   :532942   Max.   :706985   Max.   :643570   Max.   :684723  
##                                                                     
##  away_player_5    away_player_6    away_player_7    away_player_8   
##  Min.   :  2790   Min.   :  2802   Min.   :  2802   Min.   :  2802  
##  1st Qu.: 31291   1st Qu.: 30889   1st Qu.: 30655   1st Qu.: 30871  
##  Median : 40006   Median : 38746   Median : 38818   Median : 38807  
##  Mean   : 87964   Mean   : 82427   Mean   : 82676   Mean   : 88888  
##  3rd Qu.:111865   3rd Qu.:109898   3rd Qu.:111020   3rd Qu.:108568  
##  Max.   :693138   Max.   :722766   Max.   :750435   Max.   :710807  
##                                                                     
##  away_player_9    away_player_10   away_player_11       goal          
##  Min.   :  2802   Min.   :  2770   Min.   :  2802   Length:6080       
##  1st Qu.: 30893   1st Qu.: 30853   1st Qu.: 30830   Class :character  
##  Median : 38433   Median : 38570   Median : 38134   Mode  :character  
##  Mean   : 88430   Mean   : 88476   Mean   : 77186                     
##  3rd Qu.:111990   3rd Qu.:110189   3rd Qu.: 96509                     
##  Max.   :722766   Max.   :722766   Max.   :696365                     
##                                                                       
##     shoton            shotoff           foulcommit            card          
##  Length:6080        Length:6080        Length:6080        Length:6080       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##     cross              corner           possession            B365H       
##  Length:6080        Length:6080        Length:6080        Min.   : 1.040  
##  Class :character   Class :character   Class :character   1st Qu.: 1.670  
##  Mode  :character   Mode  :character   Mode  :character   Median : 2.100  
##                                                           Mean   : 2.732  
##                                                           3rd Qu.: 2.800  
##                                                           Max.   :26.000  
##                                                           NA's   :1       
##      B365D            B365A             BWH              BWD        
##  Min.   : 2.500   Min.   : 1.080   Min.   : 1.030   Min.   : 2.400  
##  1st Qu.: 3.300   1st Qu.: 2.550   1st Qu.: 1.650   1st Qu.: 3.250  
##  Median : 3.500   Median : 3.600   Median : 2.100   Median : 3.400  
##  Mean   : 4.057   Mean   : 5.068   Mean   : 2.653   Mean   : 3.937  
##  3rd Qu.: 4.200   3rd Qu.: 5.500   3rd Qu.: 2.750   3rd Qu.: 4.000  
##  Max.   :17.000   Max.   :41.000   Max.   :34.000   Max.   :19.500  
##  NA's   :1        NA's   :1        NA's   :2        NA's   :2       
##       BWA              IWH              IWD              IWA       
##  Min.   : 1.100   Min.   : 1.050   Min.   : 2.500   Min.   : 1.10  
##  1st Qu.: 2.500   1st Qu.: 1.650   1st Qu.: 3.300   1st Qu.: 2.50  
##  Median : 3.500   Median : 2.100   Median : 3.400   Median : 3.30  
##  Mean   : 4.738   Mean   : 2.558   Mean   : 3.786   Mean   : 4.43  
##  3rd Qu.: 5.250   3rd Qu.: 2.600   3rd Qu.: 3.900   3rd Qu.: 4.90  
##  Max.   :51.000   Max.   :20.000   Max.   :11.000   Max.   :25.00  
##  NA's   :2        NA's   :6        NA's   :6        NA's   :6      
##       LBH              LBD              LBA              PSH       
##  Min.   : 1.040   Min.   : 2.380   Min.   : 1.100   Min.   : 1.04  
##  1st Qu.: 1.660   1st Qu.: 3.300   1st Qu.: 2.500   1st Qu.: 1.66  
##  Median : 2.100   Median : 3.400   Median : 3.500   Median : 2.15  
##  Mean   : 2.638   Mean   : 3.925   Mean   : 4.718   Mean   : 2.89  
##  3rd Qu.: 2.750   3rd Qu.: 4.000   3rd Qu.: 5.000   3rd Qu.: 2.99  
##  Max.   :26.000   Max.   :19.000   Max.   :51.000   Max.   :36.00  
##  NA's   :3        NA's   :3        NA's   :3        NA's   :3044   
##       PSD              PSA              WHH              WHD        
##  Min.   : 3.040   Min.   : 1.090   Min.   : 1.020   Min.   : 2.380  
##  1st Qu.: 3.440   1st Qu.: 2.550   1st Qu.: 1.670   1st Qu.: 3.200  
##  Median : 3.690   Median : 3.780   Median : 2.100   Median : 3.300  
##  Mean   : 4.409   Mean   : 5.466   Mean   : 2.698   Mean   : 3.806  
##  3rd Qu.: 4.440   3rd Qu.: 5.822   3rd Qu.: 2.750   3rd Qu.: 3.800  
##  Max.   :29.000   Max.   :47.500   Max.   :26.000   Max.   :17.000  
##  NA's   :3044     NA's   :3044     NA's   :1        NA's   :1       
##       WHA              SJH              SJD              SJA        
##  Min.   : 1.080   Min.   : 1.040   Min.   : 2.700   Min.   : 1.130  
##  1st Qu.: 2.500   1st Qu.: 1.670   1st Qu.: 3.300   1st Qu.: 2.600  
##  Median : 3.500   Median : 2.100   Median : 3.500   Median : 3.600  
##  Mean   : 4.924   Mean   : 2.683   Mean   : 3.983   Mean   : 5.071  
##  3rd Qu.: 5.500   3rd Qu.: 2.700   3rd Qu.: 4.000   3rd Qu.: 5.500  
##  Max.   :51.000   Max.   :23.000   Max.   :15.000   Max.   :41.000  
##  NA's   :1        NA's   :1452     NA's   :1452     NA's   :1452    
##       VCH              VCD              VCA              GBH        
##  Min.   : 1.030   Min.   : 2.500   Min.   : 1.080   Min.   : 1.050  
##  1st Qu.: 1.670   1st Qu.: 3.300   1st Qu.: 2.600   1st Qu.: 1.670  
##  Median : 2.100   Median : 3.500   Median : 3.600   Median : 2.100  
##  Mean   : 2.786   Mean   : 4.134   Mean   : 5.307   Mean   : 2.593  
##  3rd Qu.: 2.880   3rd Qu.: 4.200   3rd Qu.: 5.500   3rd Qu.: 2.600  
##  Max.   :36.000   Max.   :26.000   Max.   :67.000   Max.   :17.000  
##  NA's   :2        NA's   :2        NA's   :2        NA's   :2288    
##       GBD              GBA             BSH              BSD        
##  Min.   : 2.750   Min.   : 1.12   Min.   : 1.040   Min.   : 2.750  
##  1st Qu.: 3.250   1st Qu.: 2.60   1st Qu.: 1.670   1st Qu.: 3.250  
##  Median : 3.400   Median : 3.50   Median : 2.100   Median : 3.400  
##  Mean   : 3.825   Mean   : 4.63   Mean   : 2.593   Mean   : 3.822  
##  3rd Qu.: 3.800   3rd Qu.: 5.00   3rd Qu.: 2.620   3rd Qu.: 3.800  
##  Max.   :11.000   Max.   :34.00   Max.   :17.000   Max.   :13.000  
##  NA's   :2288     NA's   :2288    NA's   :2281     NA's   :2281    
##       BSA         on_target_shot_home_team on_target_shot_away_team
##  Min.   : 1.140   Min.   : 0.000           Min.   : 0.000          
##  1st Qu.: 2.600   1st Qu.: 1.000           1st Qu.: 1.000          
##  Median : 3.500   Median : 5.000           Median : 4.000          
##  Mean   : 4.733   Mean   : 4.879           Mean   : 3.862          
##  3rd Qu.: 5.000   3rd Qu.: 7.000           3rd Qu.: 6.000          
##  Max.   :34.000   Max.   :26.000           Max.   :19.000          
##  NA's   :2281                                                      
##  off_target_shot_home_team off_target_shot_away_team foul_home_team  
##  Min.   : 0.000            Min.   : 0.000            Min.   : 0.000  
##  1st Qu.: 1.000            1st Qu.: 1.000            1st Qu.: 5.000  
##  Median : 5.000            Median : 4.000            Median :10.000  
##  Mean   : 4.907            Mean   : 3.865            Mean   : 8.983  
##  3rd Qu.: 7.000            3rd Qu.: 6.000            3rd Qu.:13.000  
##  Max.   :21.000            Max.   :19.000            Max.   :32.000  
##                                                                      
##  foul_away_team   yellow_card_home_team yellow_card_away_team
##  Min.   : 0.000   Min.   :0.000         Min.   : 0.000       
##  1st Qu.: 5.000   1st Qu.:1.000         1st Qu.: 1.000       
##  Median :10.000   Median :2.000         Median : 2.000       
##  Mean   : 9.346   Mean   :1.981         Mean   : 2.331       
##  3rd Qu.:14.000   3rd Qu.:3.000         3rd Qu.: 3.000       
##  Max.   :37.000   Max.   :9.000         Max.   :10.000       
##                                                              
##  red_card_home_team red_card_away_team crosses_home_team crosses_away_team
##  Min.   :0.00000    Min.   :0.00000    Min.   : 0.00     Min.   : 0.00    
##  1st Qu.:0.00000    1st Qu.:0.00000    1st Qu.: 6.00     1st Qu.: 4.00    
##  Median :0.00000    Median :0.00000    Median :16.00     Median :12.00    
##  Mean   :0.05493    Mean   :0.06546    Mean   :15.35     Mean   :11.87    
##  3rd Qu.:0.00000    3rd Qu.:0.00000    3rd Qu.:23.00     3rd Qu.:18.00    
##  Max.   :2.00000    Max.   :2.00000    Max.   :72.00     Max.   :55.00    
##                                                                           
##  corner_home_team corner_away_team possession_home_team possession_away_team
##  Min.   : 0.000   Min.   : 0.000   Min.   : 4.00        Min.   :17.00       
##  1st Qu.: 1.000   1st Qu.: 0.000   1st Qu.:48.00        1st Qu.:41.00       
##  Median : 5.000   Median : 3.000   Median :53.00        Median :47.00       
##  Mean   : 4.641   Mean   : 3.595   Mean   :52.98        Mean   :47.02       
##  3rd Qu.: 7.000   3rd Qu.: 6.000   3rd Qu.:59.00        3rd Qu.:52.00       
##  Max.   :20.000   Max.   :19.000   Max.   :83.00        Max.   :96.00       
##                                                                             
##      H_Age           A_Age         Country             League         
##  Min.   :23.26   Min.   :22.65   Length:6080        Length:6080       
##  1st Qu.:26.35   1st Qu.:26.32   Class :character   Class :character  
##  Median :27.28   Median :27.25   Mode  :character   Mode  :character  
##  Mean   :27.27   Mean   :27.25                                        
##  3rd Qu.:28.18   3rd Qu.:28.16                                        
##  Max.   :32.24   Max.   :32.29                                        
##                                                                       
##    HomeTeam           AwayTeam        
##  Length:6080        Length:6080       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
glimpse(matches)
## Rows: 6,080
## Columns: 136
## $ country_id                <int> 1729, 1729, 1729, 1729, 1729, 1729, 1729, 17…
## $ league_id                 <int> 1729, 1729, 1729, 1729, 1729, 1729, 1729, 17…
## $ season                    <chr> "2008/2009", "2008/2009", "2008/2009", "2008…
## $ stage                     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 10, 10…
## $ date                      <chr> "8/17/2008", "8/16/2008", "8/16/2008", "8/16…
## $ match_api_id              <int> 489042, 489043, 489044, 489045, 489046, 4890…
## $ home_team_api_id          <int> 10260, 9825, 8472, 8654, 10252, 8668, 8549, …
## $ away_team_api_id          <int> 10261, 8659, 8650, 8528, 8456, 8655, 8586, 1…
## $ home_team_goal            <int> 1, 1, 0, 2, 4, 2, 2, 3, 2, 4, 2, 4, 2, 0, 0,…
## $ away_team_goal            <int> 1, 0, 1, 1, 2, 3, 1, 1, 1, 0, 0, 4, 0, 1, 3,…
## $ home_player_X1            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ home_player_X2            <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ home_player_X3            <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ home_player_X4            <int> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
## $ home_player_X5            <int> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,…
## $ home_player_X6            <int> 2, 2, 2, 2, 2, 1, 2, 1, 2, 3, 2, 2, 2, 2, 3,…
## $ home_player_X7            <int> 4, 4, 4, 4, 4, 3, 4, 3, 4, 5, 4, 4, 4, 4, 5,…
## $ home_player_X8            <int> 6, 6, 6, 6, 6, 5, 6, 5, 6, 7, 6, 6, 6, 6, 7,…
## $ home_player_X9            <int> 8, 8, 8, 8, 8, 7, 8, 7, 8, 4, 8, 8, 8, 8, 5,…
## $ home_player_X10           <int> 4, 4, 4, 4, 4, 9, 4, 9, 4, 6, 4, 4, 4, 4, 4,…
## $ home_player_X11           <int> 6, 6, 6, 6, 6, 5, 6, 5, 6, 5, 6, 6, 6, 6, 6,…
## $ away_player_X1            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ away_player_X2            <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ away_player_X3            <int> 4, 4, 4, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ away_player_X4            <int> 6, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
## $ away_player_X5            <int> 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,…
## $ away_player_X6            <int> 2, 5, 2, 2, 1, 4, 2, 2, 2, 2, 3, 2, 5, 2, 3,…
## $ away_player_X7            <int> 4, 7, 4, 4, 3, 6, 4, 4, 4, 4, 5, 4, 2, 4, 5,…
## $ away_player_X8            <int> 6, 9, 6, 6, 5, 8, 6, 6, 6, 6, 7, 6, 4, 6, 7,…
## $ away_player_X9            <int> 8, 1, 8, 8, 7, 2, 8, 8, 8, 8, 3, 8, 6, 8, 3,…
## $ away_player_X10           <int> 5, 3, 4, 4, 9, 6, 4, 4, 4, 4, 5, 5, 8, 4, 5,…
## $ away_player_X11           <int> 5, 5, 6, 6, 5, 4, 6, 6, 6, 6, 7, 5, 5, 6, 7,…
## $ home_player_Y1            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ home_player_Y2            <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ home_player_Y3            <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ home_player_Y4            <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ home_player_Y5            <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ home_player_Y6            <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 5,…
## $ home_player_Y7            <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 5,…
## $ home_player_Y8            <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 5,…
## $ home_player_Y9            <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7, 7, 8,…
## $ home_player_Y10           <int> 10, 10, 10, 10, 10, 7, 10, 7, 10, 8, 10, 10,…
## $ home_player_Y11           <int> 10, 10, 10, 10, 10, 11, 10, 11, 10, 11, 10, …
## $ away_player_Y1            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ away_player_Y2            <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ away_player_Y3            <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ away_player_Y4            <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ away_player_Y5            <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ away_player_Y6            <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 7, 7,…
## $ away_player_Y7            <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,…
## $ away_player_Y8            <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,…
## $ away_player_Y9            <int> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 7, 7, 7, 1…
## $ away_player_Y10           <int> 9, 7, 10, 10, 7, 10, 10, 10, 10, 10, 10, 9, …
## $ away_player_Y11           <int> 11, 11, 10, 10, 11, 10, 10, 10, 10, 10, 10, …
## $ home_player_1             <int> 30726, 23686, 32562, 36374, 30380, 31465, 35…
## $ home_player_2             <int> 30362, 26111, 38836, 30966, 30357, 30371, 24…
## $ home_player_3             <int> 30620, 38835, 24446, 23818, 24658, 24004, 24…
## $ home_player_4             <int> 30865, 30986, 24408, 37277, 43280, 33086, 38…
## $ home_player_5             <int> 32569, 31291, 36786, 30687, 23282, 30857, 24…
## $ home_player_6             <int> 24148, 31013, 38802, 36394, 38609, 24011, 24…
## $ home_player_7             <int> 34944, 30935, 24655, 37169, 24780, 109058, 9…
## $ home_player_8             <int> 30373, 39297, 17866, 24223, 23782, 23268, 24…
## $ home_player_9             <int> 24154, 26181, 30352, 24773, 23354, 24846, 30…
## $ home_player_10            <int> 24157, 30960, 23927, 34543, 23264, 24006, 35…
## $ home_player_11            <int> 30829, 36410, 24410, 23139, 26165, 24160, 42…
## $ away_player_1             <int> 24224, 36373, 30660, 34421, 31432, 30622, 30…
## $ away_player_2             <int> 25518, 36832, 37442, 34987, 46403, 37764, 34…
## $ away_player_3             <int> 24228, 23115, 30617, 35472, 24208, 19020, 38…
## $ away_player_4             <int> 30929, 37280, 24134, 111865, 23939, 23921, 2…
## $ away_player_5             <int> 29581, 24728, 414792, 25005, 33963, 24136, 4…
## $ away_player_6             <int> 38807, 24664, 37139, 35327, 47413, 30342, 30…
## $ away_player_7             <int> 40565, 31088, 30618, 25150, 40198, 23889, 30…
## $ away_player_8             <int> 30360, 23257, 40701, 97988, 42119, 23916, 31…
## $ away_player_9             <int> 33852, 24171, 24800, 41877, 222222, 23922, 2…
## $ away_player_10            <int> 34574, 25922, 24635, 127857, 33633, 34176, 4…
## $ away_player_11            <int> 37799, 27267, 30853, 34466, 107216, 30646, 2…
## $ goal                      <chr> "<goal><value><comment>n</comment><stats><go…
## $ shoton                    <chr> "<shoton><value><stats><blocked>1</blocked><…
## $ shotoff                   <chr> "<shotoff><value><stats><shotoff>1</shotoff>…
## $ foulcommit                <chr> "<foulcommit><value><stats><foulscommitted>1…
## $ card                      <chr> "<card><value><comment>y</comment><stats><yc…
## $ cross                     <chr> "<cross><value><stats><crosses>1</crosses></…
## $ corner                    <chr> "<corner><value><stats><corners>1</corners><…
## $ possession                <chr> "<possession><value><comment>56</comment><ev…
## $ B365H                     <dbl> 1.29, 1.20, 5.50, 1.91, 1.91, 2.00, 3.20, 1.…
## $ B365D                     <dbl> 5.50, 6.50, 3.60, 3.40, 3.40, 3.30, 3.40, 3.…
## $ B365A                     <dbl> 11.00, 15.00, 1.67, 4.20, 4.33, 4.00, 2.25, …
## $ BWH                       <dbl> 1.30, 1.22, 5.00, 1.90, 1.95, 1.85, 2.80, 1.…
## $ BWD                       <dbl> 4.75, 5.50, 3.35, 3.20, 3.20, 3.25, 3.20, 3.…
## $ BWA                       <dbl> 8.25, 10.00, 1.67, 3.80, 3.60, 4.00, 2.30, 4…
## $ IWH                       <dbl> 1.30, 1.20, 4.50, 1.80, 2.00, 2.00, 2.90, 1.…
## $ IWD                       <dbl> 4.4, 5.2, 3.5, 3.3, 3.2, 3.2, 3.2, 3.3, 3.2,…
## $ IWA                       <dbl> 8.50, 11.00, 1.65, 3.80, 3.30, 3.30, 2.20, 4…
## $ LBH                       <dbl> 1.25, 1.20, 4.50, 1.80, 1.83, 1.80, 2.80, 1.…
## $ LBD                       <dbl> 4.50, 5.00, 3.30, 3.20, 3.20, 3.20, 3.20, 3.…
## $ LBA                       <dbl> 10.00, 11.00, 1.67, 4.00, 3.75, 4.00, 2.20, …
## $ PSH                       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ PSD                       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ PSA                       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ WHH                       <dbl> 1.25, 1.17, 5.50, 1.83, 1.91, 1.95, 2.90, 1.…
## $ WHD                       <dbl> 4.50, 5.50, 3.30, 3.20, 3.20, 3.10, 3.20, 3.…
## $ WHA                       <dbl> 10.00, 12.00, 1.57, 3.75, 3.50, 3.50, 2.15, …
## $ SJH                       <dbl> 1.25, 1.20, 4.33, 1.91, 1.91, 2.00, 2.88, 1.…
## $ SJD                       <dbl> 5.00, 5.50, 3.40, 3.25, 3.25, 3.25, 3.40, 3.…
## $ SJA                       <dbl> 10.00, 12.00, 1.73, 3.75, 3.75, 3.40, 2.20, …
## $ VCH                       <dbl> 1.28, 1.25, 5.50, 1.90, 1.90, 2.05, 3.20, 1.…
## $ VCD                       <dbl> 5.50, 6.00, 3.80, 3.50, 3.50, 3.30, 3.40, 3.…
## $ VCA                       <dbl> 12.00, 13.00, 1.65, 4.35, 4.35, 4.00, 2.30, …
## $ GBH                       <dbl> 1.30, 1.22, 5.00, 1.91, 1.91, 2.00, 3.00, 1.…
## $ GBD                       <dbl> 4.75, 5.50, 3.40, 3.25, 3.25, 3.25, 3.25, 3.…
## $ GBA                       <dbl> 10.00, 13.00, 1.70, 4.00, 4.00, 3.75, 2.30, …
## $ BSH                       <dbl> 1.29, 1.22, 4.50, 1.91, 1.91, 2.00, 2.80, 1.…
## $ BSD                       <dbl> 4.50, 5.00, 3.40, 3.25, 3.30, 3.25, 3.25, 3.…
## $ BSA                       <dbl> 11.00, 13.00, 1.73, 3.80, 3.75, 3.50, 2.30, …
## $ on_target_shot_home_team  <int> 11, 12, 4, 5, 5, 2, 7, 5, 5, 6, 11, 9, 5, 13…
## $ on_target_shot_away_team  <int> 1, 2, 11, 7, 9, 8, 2, 1, 4, 7, 3, 3, 11, 6, …
## $ off_target_shot_home_team <int> 10, 13, 3, 7, 4, 7, 5, 7, 5, 10, 8, 11, 2, 6…
## $ off_target_shot_away_team <int> 9, 3, 5, 15, 5, 8, 5, 6, 6, 5, 3, 5, 5, 4, 1…
## $ foul_home_team            <int> 16, 11, 13, 14, 11, 11, 11, 14, 10, 11, 11, …
## $ foul_away_team            <int> 11, 9, 12, 13, 13, 11, 11, 15, 9, 8, 15, 16,…
## $ yellow_card_home_team     <int> 3, 0, 0, 2, 0, 2, 1, 1, 3, 0, 1, 1, 1, 1, 0,…
## $ yellow_card_away_team     <int> 0, 0, 2, 1, 1, 2, 2, 2, 0, 1, 2, 4, 3, 2, 2,…
## $ red_card_home_team        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ red_card_away_team        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ crosses_home_team         <int> 24, 21, 15, 15, 16, 14, 25, 22, 16, 25, 14, …
## $ crosses_away_team         <int> 9, 7, 19, 27, 16, 21, 27, 29, 14, 9, 6, 14, …
## $ corner_home_team          <int> 6, 7, 1, 6, 7, 3, 7, 4, 5, 8, 3, 9, 3, 5, 5,…
## $ corner_away_team          <int> 6, 5, 8, 10, 8, 4, 9, 3, 6, 3, 1, 1, 3, 3, 4…
## $ possession_home_team      <int> 55, 66, 46, 52, 52, 51, 53, 47, 53, 65, 60, …
## $ possession_away_team      <int> 45, 34, 54, 48, 48, 49, 47, 53, 47, 35, 40, …
## $ H_Age                     <dbl> 28.57454, 23.97286, 26.75757, 26.17812, 27.5…
## $ A_Age                     <dbl> 26.40214, 25.60740, 27.40097, 26.64033, 23.7…
## $ Country                   <chr> "England", "England", "England", "England", …
## $ League                    <chr> "England Premier League", "England Premier L…
## $ HomeTeam                  <chr> "Manchester United", "Arsenal", "Sunderland"…
## $ AwayTeam                  <chr> "Newcastle United", "West Bromwich Albion", …
# Dropping columns that are not required for our project.

matches <- matches[,-1:-2]

matches <- matches %>% select(-(match_api_id:away_team_api_id))

matches <- matches %>% select(-(goal:BSA))

matches <- matches %>% select(-(home_player_X1:away_player_Y11))
# Replacing NA values on players id column with a fake id of 222222. We want to replace the NA values in player names with a fake player who has an age, height and weight so that we can substitute those attributes with the average age, height and weight of all players.

matches <- matches %>% 
    mutate_at(c(6:27), ~replace_na(.,222222))
# Converting the "date" variable in matches data frame and "birthday" variable in players data frame to date type using the Lubridate package

matches <- matches %>%
  mutate(date = mdy(date))



players <- players %>%
  mutate(birthday = ymd(birthday))
# Performing left join operation to get the names of each players in the data frame and removing their player IDs.

nam <- c(names(matches[c(6:27)])) # List of columns from 6th index to 27th.
for (i in 1:length(nam)){
  names(matches)[names(matches) == nam[i]] <- "player_api_id" # Changing the name of 6th to 27th column to the same column name so that we can match the column with the column in players data frame. 
  matches <- matches %>% left_join(players[c("player_api_id", "player_name")], 
                    by = "player_api_id")
  names(matches)[names(matches) == "player_name"] <- nam[i] # After a join happens we rename the column back to its original name so that we don't have two columns with same names.
  matches = subset(matches, select = -c(player_api_id) ) # Removing the column which had the player's id, we just need their names
}
matchesF <- matches # I did the next part separately in a different file using the data frame named 'matchesF' so we don't need to change the names in all places
# To merge the matchesF table with the managers and referees of each league, we need a column that matches on all tables. The team names in some tables use a deprecated form of their full name so I renamed all the Team names so they matched everywhere.


eplref$Home <- str_replace(eplref$Home, "Tottenham", "Tottenham Hotspur")
eplref$Home <- str_replace(eplref$Home, "Newcastle", "Newcastle United")
eplref$Home <- str_replace(eplref$Home, "West Ham", "West Ham United")
eplref$Home <- str_replace(eplref$Home, "Swansea", "Swansea City")
eplref$Home <- str_replace(eplref$Home, "Wigan", "Wigan Athletic")
eplref$Home <- str_replace(eplref$Home, "Norwich", "Norwich City")
eplref$Home <- str_replace(eplref$Home, "Wolverhampton", "Wolverhampton Wanderers")
eplref$Home <- str_replace(eplref$Home, "Birmingham", "Birmingham City")
eplref$Home <- str_replace(eplref$Home, "Leicester", "Leicester City")
eplref$Home <- str_replace(eplref$Home, "Cardiff", "Cardiff City")

eplref$Away <- str_replace(eplref$Away, "Tottenham", "Tottenham Hotspur")
eplref$Away <- str_replace(eplref$Away, "Newcastle", "Newcastle United")
eplref$Away <- str_replace(eplref$Away, "West Ham", "West Ham United")
eplref$Away <- str_replace(eplref$Away, "Swansea", "Swansea City")
eplref$Away <- str_replace(eplref$Away, "Wigan", "Wigan Athletic")
eplref$Away <- str_replace(eplref$Away, "Norwich", "Norwich City")
eplref$Away <- str_replace(eplref$Away, "Wolverhampton", "Wolverhampton Wanderers")
eplref$Away <- str_replace(eplref$Away, "Birmingham", "Birmingham City")
eplref$Away <- str_replace(eplref$Away, "Leicester", "Leicester City")
eplref$Away <- str_replace(eplref$Away, "Cardiff", "Cardiff City")
laligaref$Home <- str_replace(laligaref$Home, "Real Madrid", "Real Madrid CF")
laligaref$Home <- str_replace(laligaref$Home, "Barcelona", "FC Barcelona")
laligaref$Home <- str_replace(laligaref$Home, "Málaga", "Málaga CF")
laligaref$Home <- str_replace(laligaref$Home, "Valencia", "Valencia CF")
laligaref$Home <- str_replace(laligaref$Home, "Atlético de Madrid", "Atlético Madrid")
laligaref$Home <- str_replace(laligaref$Home, "Getafe", "Getafe CF")
laligaref$Home <- str_replace(laligaref$Home, "Sevilla", "Sevilla FC")
laligaref$Home <- str_replace(laligaref$Home, "Espanyol", "RCD Espanyol")
laligaref$Home <- str_replace(laligaref$Home, "Athletic Club", "Athletic Club de Bilbao")
laligaref$Home <- str_replace(laligaref$Home, "Villarreal", "Villarreal CF")

laligaref$Home <- str_replace(laligaref$Home, "Osasuna", "CA Osasuna")
laligaref$Home <- str_replace(laligaref$Home, "Deportivo de La Coruña", "RC Deportivo de La Coruña")
laligaref$Home <- str_replace(laligaref$Home, "Levante", "Levante UD")
laligaref$Home <- str_replace(laligaref$Home, "Granada", "Granada CF")
laligaref$Home <- str_replace(laligaref$Home, "Sporting de Gijón", "Real Sporting de Gijón")
laligaref$Home <- str_replace(laligaref$Home, "Almería", "UD Almería")
laligaref$Home <- str_replace(laligaref$Home, "Mallorca", "RCD Mallorca")
laligaref$Home <- str_replace(laligaref$Home, "Betis", "Real Betis Balompié")
laligaref$Home <- str_replace(laligaref$Home, "Zaragoza", "Real Zaragoza")
laligaref$Home <- str_replace(laligaref$Home, "Valladolid", "Real Valladolid")

laligaref$Home <- str_replace(laligaref$Home, "Racing de Santander", "Racing Santander")
laligaref$Home <- str_replace(laligaref$Home, "Celta de Vigo", "RC Celta de Vigo")
laligaref$Home <- str_replace(laligaref$Home, "Elche", "Elche CF")
laligaref$Home <- str_replace(laligaref$Home, "Eibar", "SD Eibar")
laligaref$Home <- str_replace(laligaref$Home, "Córdoba", "Córdoba CF")
laligaref$Home <- str_replace(laligaref$Home, "Numancia", "CD Numancia")
laligaref$Home <- str_replace(laligaref$Home, "Las Palmas", "UD Las Palmas")
laligaref$Home <- str_replace(laligaref$Home, "Xerez", "Xerez Club Deportivo")
laligaref$Home <- str_replace(laligaref$Home, "Hércules", "Hércules Club de Fútbol")
laligaref$Home <- str_replace(laligaref$Home, "Tenerife", "CD Tenerife")
laligaref$Home <- str_replace(laligaref$Home, "Recreativo de Huelva", "RC Recreativo")




laligaref$Away <- str_replace(laligaref$Away, "Real Madrid", "Real Madrid CF")
laligaref$Away <- str_replace(laligaref$Away, "Barcelona", "FC Barcelona")
laligaref$Away <- str_replace(laligaref$Away, "Málaga", "Málaga CF")
laligaref$Away <- str_replace(laligaref$Away, "Valencia", "Valencia CF")
laligaref$Away <- str_replace(laligaref$Away, "Atlético de Madrid", "Atlético Madrid")
laligaref$Away <- str_replace(laligaref$Away, "Getafe", "Getafe CF")
laligaref$Away <- str_replace(laligaref$Away, "Sevilla", "Sevilla FC")
laligaref$Away <- str_replace(laligaref$Away, "Espanyol", "RCD Espanyol")
laligaref$Away <- str_replace(laligaref$Away, "Athletic Club", "Athletic Club de Bilbao")
laligaref$Away <- str_replace(laligaref$Away, "Villarreal", "Villarreal CF")

laligaref$Away <- str_replace(laligaref$Away, "Osasuna", "CA Osasuna")
laligaref$Away <- str_replace(laligaref$Away, "Deportivo de La Coruña", "RC Deportivo de La Coruña")
laligaref$Away <- str_replace(laligaref$Away, "Levante", "Levante UD")
laligaref$Away <- str_replace(laligaref$Away, "Granada", "Granada CF")
laligaref$Away <- str_replace(laligaref$Away, "Sporting de Gijón", "Real Sporting de Gijón")
laligaref$Away <- str_replace(laligaref$Away, "Almería", "UD Almería")
laligaref$Away <- str_replace(laligaref$Away, "Mallorca", "RCD Mallorca")
laligaref$Away <- str_replace(laligaref$Away, "Betis", "Real Betis Balompié")
laligaref$Away <- str_replace(laligaref$Away, "Zaragoza", "Real Zaragoza")
laligaref$Away <- str_replace(laligaref$Away, "Valladolid", "Real Valladolid")

laligaref$Away <- str_replace(laligaref$Away, "Racing de Santander", "Racing Santander")
laligaref$Away <- str_replace(laligaref$Away, "Celta de Vigo", "RC Celta de Vigo")
laligaref$Away <- str_replace(laligaref$Away, "Elche", "Elche CF")
laligaref$Away <- str_replace(laligaref$Away, "Eibar", "SD Eibar")
laligaref$Away <- str_replace(laligaref$Away, "Córdoba", "Córdoba CF")
laligaref$Away <- str_replace(laligaref$Away, "Numancia", "CD Numancia")
laligaref$Away <- str_replace(laligaref$Away, "Las Palmas", "UD Las Palmas")
laligaref$Away <- str_replace(laligaref$Away, "Xerez", "Xerez Club Deportivo")
laligaref$Away <- str_replace(laligaref$Away, "Hércules", "Hércules Club de Fútbol")
laligaref$Away <- str_replace(laligaref$Away, "Tenerife", "CD Tenerife")
laligaref$Away <- str_replace(laligaref$Away, "Recreativo de Huelva", "RC Recreativo")
laligaman$Team <- str_replace(laligaman$Team, "Real Madrid", "Real Madrid CF")
laligaman$Team <- str_replace(laligaman$Team, "Barcelona", "FC Barcelona")
laligaman$Team <- str_replace(laligaman$Team, "Málaga", "Málaga CF")
laligaman$Team <- str_replace(laligaman$Team, "Valencia", "Valencia CF")
laligaman$Team <- str_replace(laligaman$Team, "Atlético Madrid", "Atlético Madrid")
laligaman$Team <- str_replace(laligaman$Team, "Getafe", "Getafe CF")
laligaman$Team <- str_replace(laligaman$Team, "Sevilla", "Sevilla FC")
laligaman$Team <- str_replace(laligaman$Team, "Espanyol", "RCD Espanyol")
laligaman$Team <- str_replace(laligaman$Team, "Athletic Bilbao", "Athletic Club de Bilbao")
laligaman$Team <- str_replace(laligaman$Team, "Villarreal", "Villarreal CF")
laligaman$Team <- str_replace(laligaman$Team, "Osasuna", "CA Osasuna")

laligaman$Team <- str_replace(laligaman$Team, "Levante", "Levante UD")
laligaman$Team <- str_replace(laligaman$Team, "Granada", "Granada CF")
laligaman$Team <- str_replace(laligaman$Team, "Almería", "UD Almería")
laligaman$Team <- str_replace(laligaman$Team, "Mallorca", "RCD Mallorca")
laligaman$Team <- str_replace(laligaman$Team, "Zaragoza", "Real Zaragoza")
laligaman$Team <- str_replace(laligaman$Team, "Valladolid", "Real Valladolid")

laligaman$Team <- str_replace(laligaman$Team, "Racing Santander", "Racing Santander")
laligaman$Team <- str_replace(laligaman$Team, "Elche", "Elche CF")
laligaman$Team <- str_replace(laligaman$Team, "Eibar", "SD Eibar")
laligaman$Team <- str_replace(laligaman$Team, "Córdoba", "Córdoba CF")
laligaman$Team <- str_replace(laligaman$Team, "Numancia", "CD Numancia")
laligaman$Team <- str_replace(laligaman$Team, "Las Palmas", "UD Las Palmas")
laligaman$Team <- str_replace(laligaman$Team, "Xerez", "Xerez Club Deportivo")
laligaman$Team <- str_replace(laligaman$Team, "Hércules", "Hércules Club de Fútbol")
laligaman$Team <- str_replace(laligaman$Team, "Tenerife", "CD Tenerife")
laligaman$Team <- str_replace(laligaman$Team, "Recreativo", "RC Recreativo")
# Merging our Matches Data with the Manager's Data of England Premier League to get Home manager for each match

matchesF <- matchesF %>% left_join(eplman,
                         by = c(
                           "season"="Season",
                           "HomeTeam"="Team"))
colnames(matchesF)[which(names(matchesF) == "Manager")] <- "HomeManager"
# Merging our Matches Data with the Manager's Data of England Premier League (EPL) to get Away manager for each match
 
matchesF <- matchesF %>% left_join(eplman,
                         by = c(
                           "season"="Season",
                           "AwayTeam"="Team"))
colnames(matchesF)[which(names(matchesF) == "Manager")] <- "AwayManager"
# Merging our Matches Data with the Manager's Data of Spain LIGA BBVA (LaLiga) to get Home manager for each match

matchesF <- matchesF %>% left_join(laligaman,
                         by = c(
                           "season"="Season",
                           "HomeTeam"="Team"))


# When we added the Home managers for EPL, there were missing values in that column for LaLiga games. So instead of having two columns for Home Managers (One for each league), we add LaLiga's Home Managers to the same column replacing the NAs from the new column "Manager" which was created using the above left_join.

matchesF[is.na(matchesF)] <- ""
matchesF$HomeManager <- str_c(matchesF$HomeManager,"", matchesF$Manager)
matchesF = subset(matchesF, select = -c(Manager) ) # Removing the Manager column that was created from the left_join
# Merging our Matches Data with the Manager's Data of Spain LIGA BBVA (LaLiga) to get Away manager for each match

matchesF <- matchesF %>% left_join(laligaman,
                         by = c(
                           "season"="Season",
                           "AwayTeam"="Team"))

# Same thing as for the Home Managers.

matchesF[is.na(matchesF)] <- ""
matchesF$AwayManager <- str_c(matchesF$AwayManager,"", matchesF$Manager)
matchesF = subset(matchesF, select = -c(Manager) )
# Converting the date column into the same type to merge the matches table with the referee dataset for EPL and LaLiga.

matchesF <- matchesF %>%
  mutate(date = ymd(date))

eplref <- eplref %>%
  mutate(Date = dmy(Date))

laligaref <- laligaref %>%
  mutate(Date = dmy(Date))
# Merging our Matches Data with the Referee's Data to get the name of the official for each match (EPL)

matchesF <- matchesF %>% left_join(eplref,
                         by = c(
                           "date"="Date",
                           "HomeTeam"="Home",
                           "AwayTeam"="Away"))
colnames(matchesF)[which(names(matchesF) == "Referee")] <- "Ref" # Renaming the "Referee" column to "Ref"
# Merging our Matches Data with the Referee's Data to get the name of the official for each match (LaLiga)

matchesF <- matchesF %>% left_join(laligaref,
                         by = c(
                           "date"="Date",
                           "HomeTeam"="Home",
                           "AwayTeam"="Away"))
# Replacing the missing referee values in the Ref column (with EPL Refs) with LaLiga Refs.

matchesF[is.na(matchesF)] <- ""
matchesF$Ref <- str_c(matchesF$Ref,"", matchesF$Referee)
matchesF = subset(matchesF, select = -c(Referee))
matchesF <- matchesF %>% 
  dplyr::arrange(date) # Ordering the matches table in ascending order.
# Creating new column called "FullTimeResult" which either stores which team won or draw. This is done by comparing home and away team goals for each game.

matchesF["FullTimeResult"] = NA

for (i in 1:nrow(matchesF)){
  if(matchesF$home_team_goal[i] > matchesF$away_team_goal[i]){
    matchesF$FullTimeResult[i] = "Home Team"
  }else if(matchesF$home_team_goal[i] == matchesF$away_team_goal[i]){
    matchesF$FullTimeResult[i] = "Draw"
  }else{
    matchesF$FullTimeResult[i] = "Away Team"
  }
}
# Grouping the data by season and looking at how many goals were scored in each of those seasons in each league.

goalsPerSeason <- matchesF %>% group_by(season, League) %>% 
  summarize(Goals = sum(home_team_goal+away_team_goal, na.rm = T))
## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.
goalsPerSeason
## # A tibble: 16 × 3
## # Groups:   season [8]
##    season    League                 Goals
##    <chr>     <chr>                  <int>
##  1 2008/2009 England Premier League   942
##  2 2008/2009 Spain LIGA BBVA         1101
##  3 2009/2010 England Premier League  1053
##  4 2009/2010 Spain LIGA BBVA         1031
##  5 2010/2011 England Premier League  1063
##  6 2010/2011 Spain LIGA BBVA         1042
##  7 2011/2012 England Premier League  1066
##  8 2011/2012 Spain LIGA BBVA         1050
##  9 2012/2013 England Premier League  1063
## 10 2012/2013 Spain LIGA BBVA         1091
## 11 2013/2014 England Premier League  1052
## 12 2013/2014 Spain LIGA BBVA         1045
## 13 2014/2015 England Premier League   975
## 14 2014/2015 Spain LIGA BBVA         1009
## 15 2015/2016 England Premier League  1026
## 16 2015/2016 Spain LIGA BBVA         1043
# Visualizing the same information with a Grouped Bar Chart
goalsPerSeason %>% ggplot(aes(fill=League, y=Goals, x=season)) + 
  geom_bar(position="dodge", stat="identity")+
  labs(title = "Number of Goals throughout each Seasons in each League")+
  coord_flip()+
  scale_fill_viridis(discrete = T)+
  theme(legend.position="bottom")

# Grouping the data by Referees to look at who gave the most red cards in all games throughout all seasons.

redPerSeason <- matchesF %>% group_by(season, League) %>% 
  summarize(Reds = sum(red_card_home_team+red_card_away_team, na.rm = T))
## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.
redPerSeason
## # A tibble: 16 × 3
## # Groups:   season [8]
##    season    League                  Reds
##    <chr>     <chr>                  <int>
##  1 2008/2009 England Premier League    36
##  2 2008/2009 Spain LIGA BBVA           73
##  3 2009/2010 England Premier League    27
##  4 2009/2010 Spain LIGA BBVA           77
##  5 2010/2011 England Premier League    42
##  6 2010/2011 Spain LIGA BBVA           58
##  7 2011/2012 England Premier League    39
##  8 2011/2012 Spain LIGA BBVA           58
##  9 2012/2013 England Premier League    37
## 10 2012/2013 Spain LIGA BBVA           52
## 11 2013/2014 England Premier League    38
## 12 2013/2014 Spain LIGA BBVA           45
## 13 2014/2015 England Premier League    35
## 14 2014/2015 Spain LIGA BBVA           45
## 15 2015/2016 England Premier League    34
## 16 2015/2016 Spain LIGA BBVA           36
# Visualizing the same information with a Grouped Bar Chart
redPerSeason %>% ggplot(aes(y=Reds, x=season, fill = League)) + 
  geom_bar(position="dodge", stat="identity")+
  labs(title = "Number of Red Cards throughout each Seasons",
       x = "Seasons",
       y = "Number of Red Cards")+
  coord_flip()+
  scale_fill_viridis(discrete = T)+
  theme(legend.position="bottom")

# Function to get two data frames with the season statistics.

seasonalData <- function(a, b){ # a is the season and b is the League name
  
  season <- matchesF[matchesF$season == a & (matchesF$League == b),] # Filtering the matches data set with the passed parameters and storing the resulting table as "season" and creating some more columns that we seek to get. This table will have all the match level information.
  season['HomePoints'] <- 0
  season['AwayPoints'] <- 0
  season['HomePosition'] <- 0
  season['AwayPosition'] <- 0
  season['HomeGoalsFor'] <- 0
  season['AwayGoalsFor'] <- 0
  season['HomeGoalsAgainst'] <- 0
  season['AwayGoalsAgainst'] <- 0
  
  Teams <- c(unique(season$HomeTeam))
  Points <- c(0)
  GF <- c(0)
  GA <- c(0)
  GD <- c(0)
  Rank <- c(0)
  Played <- 38
  df <- data.frame(Rank, Teams, Played, GF, GA, GD, Points) #Creating a new data frame that will hold the high level data, i.e, the overall data of the season for each team like how many points each team got, how many goals each scored.
  
  teamPoints <- c()
  for(i in unique(season$HomeTeam)){
    teamPoints[i] <- 0
  } # creating a dictionary like object to hold the points for each team for the season. Initial value is 0
  
  teamGoalsfor <- c()
  for(i in unique(season$HomeTeam)){
    teamGoalsfor[i] <- 0
  } # creating a dictionary like object to hold the goals each team scored in the season. Initial value is 0
  
  teamGoalsagainst <- c()
  for(i in unique(season$HomeTeam)){
    teamGoalsagainst[i] <- 0
  } # creating a dictionary like object to hold the goals each team conceded in the season. Initial value is 0
  
  for(i in 1:nrow(season)){ # Going through each games to see which teams won. 
    if(season$home_team_goal[i] > season$away_team_goal[i]){
      
      df[df$Teams == season$HomeTeam[i],]$Points = df[df$Teams == season$HomeTeam[i],]$Points+3 # Updating the points on the "df" dataframe. Each win gives 3 points. 
      
      teamPoints[season$HomeTeam[i]] = teamPoints[season$HomeTeam[i]] + 3 # Updating the points to the dictionary which we will later store in the seasons table. this is needes to get match by match points throughout the season to get a line chart.
      teamPoints[season$AwayTeam[i]] = teamPoints[season$AwayTeam[i]] + 0
      
    } else if (season$home_team_goal[i] == season$away_team_goal[i]){ # In case of a draw, we add 1 point to each team
      df[df$Teams == season$HomeTeam[i],]$Points = df[df$Teams == season$HomeTeam[i],]$Points+1 
      df[df$Teams == season$AwayTeam[i],]$Points = df[df$Teams == season$AwayTeam[i],]$Points+1
      
      teamPoints[season$HomeTeam[i]] = teamPoints[season$HomeTeam[i]] + 1 
      teamPoints[season$AwayTeam[i]] = teamPoints[season$AwayTeam[i]] + 1
      
    } else { # Now the away team gets 3 poi
      df[df$Teams == season$AwayTeam[i],]$Points = df[df$Teams == season$AwayTeam[i],]$Points+3
      
      teamPoints[season$HomeTeam[i]] = teamPoints[season$HomeTeam[i]] + 0
      teamPoints[season$AwayTeam[i]] = teamPoints[season$AwayTeam[i]] + 3
      
    }
    #Updating the goals for the team to the dictionary which we will later store in the seasons table.
    teamGoalsfor[season$HomeTeam[i]] = teamGoalsfor[season$HomeTeam[i]] + season$home_team_goal[i] 
    teamGoalsfor[season$AwayTeam[i]] = teamGoalsfor[season$AwayTeam[i]] + season$away_team_goal[i]
    #Updating the goals against the team to the dictionary which we will later store in the seasons table.
    teamGoalsagainst[season$HomeTeam[i]] = teamGoalsagainst[season$HomeTeam[i]] + season$away_team_goal[i]
    teamGoalsagainst[season$AwayTeam[i]] = teamGoalsagainst[season$AwayTeam[i]] + season$home_team_goal[i]
    
    # Storing how many goals the Home and away teams scored so far in the season in the df dataframe.
    df[df$Teams == season$HomeTeam[i],]$GF = df[df$Teams == season$HomeTeam[i],]$GF+season$home_team_goal[i]
    df[df$Teams == season$AwayTeam[i],]$GF = df[df$Teams == season$AwayTeam[i],]$GF+season$away_team_goal[i]
    # Storing how many goals the Home and away teams conceded so far in the season in the df dataframe
    df[df$Teams == season$HomeTeam[i],]$GA = df[df$Teams == season$HomeTeam[i],]$GA+season$away_team_goal[i]
    df[df$Teams == season$AwayTeam[i],]$GA = df[df$Teams == season$AwayTeam[i],]$GA+season$home_team_goal[i]
    # Storing goal difference for each of the team df dataframe
    df$GD = df$GF - df$GA
    
    # Creating a Rank column in "df" that will give ranks based on the the points each team scored 
    order.Points <- order(df$Points,  decreasing = TRUE)
    df$Rank[order.Points] <- 1:nrow(df)
    # Adding all the information in the "seasons" dataframe from the dictionaries.
    season$HomePoints[i] = as.integer(teamPoints[season$HomeTeam[i]])
    season$AwayPoints[i] = as.integer(teamPoints[season$AwayTeam[i]])
    season$HomePosition[i] = df[df$Teams == season$HomeTeam[i],]$Rank
    season$AwayPosition[i] = df[df$Teams == season$AwayTeam[i],]$Rank
    season$HomeGoalsFor[i] = as.integer(teamGoalsfor[season$HomeTeam[i]])
    season$AwayGoalsFor[i] = as.integer(teamGoalsfor[season$AwayTeam[i]])
    season$HomeGoalsAgainst[i] = as.integer(teamGoalsagainst[season$HomeTeam[i]])
    season$AwayGoalsAgainst[i] = as.integer(teamGoalsagainst[season$AwayTeam[i]])
  }
  df <- df[order(df$Points, decreasing = TRUE),] #Ordering the "df" table based on who has the most points.
  rownames(df) <- NULL # setting the row names for both dataframes this function gives to NULL.
  rownames(season) <- NULL
  {return(list(SeasonTable=df, SeasonResult=season))} # Assigning a proper name to both dataframes and returning those.
}
SeasonTable <- seasonalData("2010/2011", "Spain LIGA BBVA")$SeasonTable # Getting a season table using the seasonalData function for LaLiga for the season 2010/2011. Since the function returns two tables, we are mentioning which table we want.
SeasonTable
##    Rank                     Teams Played  GF GA  GD Points
## 1     1              FC Barcelona     38  95 21  74     96
## 2     2            Real Madrid CF     38 102 33  69     92
## 3     3               Valencia CF     38  64 44  20     71
## 4     4             Villarreal CF     38  54 44  10     62
## 5     5           Atlético Madrid     38  62 53   9     58
## 6     6   Athletic Club de Bilbao     38  59 55   4     58
## 7     7                Sevilla FC     38  62 61   1     58
## 8     8              RCD Espanyol     38  46 55  -9     49
## 9     9                CA Osasuna     38  45 46  -1     47
## 10   10    Real Sporting de Gijón     38  35 42  -7     47
## 11   11                 Málaga CF     38  54 68 -14     46
## 12   12          Racing Santander     38  41 56 -15     46
## 13   13                Levante UD     38  41 52 -11     45
## 14   14             Real Sociedad     38  49 66 -17     45
## 15   15             Real Zaragoza     38  40 53 -13     45
## 16   16              RCD Mallorca     38  41 56 -15     44
## 17   17                 Getafe CF     38  49 60 -11     44
## 18   18 RC Deportivo de La Coruña     38  31 47 -16     43
## 19   19   Hércules Club de Fútbol     38  36 60 -24     35
## 20   20                UD Almería     38  36 70 -34     30
SeasonResult <- seasonalData("2010/2011", "Spain LIGA BBVA")$SeasonResult # Getting a season result using the seasonalData function for LaLiga for the season 2010/2011. This includes a match to match information of all the games played in LaLiga in 2010/2011 season.
# A bar chart
SeasonResult %>% count(FullTimeResult) %>% ggplot(aes(x = fct_reorder(FullTimeResult, -n),
                                           y = n))+
  geom_col(fill = "#3D195B", color = "black")+
  labs(title = "Match Result for the Season",
       x = "Result",
       y = "Number of Wins")+
  theme_bw()

# Grouping the data by Referees and looking at how many games they officiated in that season and in that league.

refCount <- SeasonResult %>% group_by(Ref) %>% 
  summarize(Games = n())

# Grouping the data by Referees and looking at how many red cards they gave that season.

refRed <- SeasonResult %>% group_by(Ref) %>% 
  summarize(Red = sum(red_card_home_team+red_card_away_team, na.rm = T))

refRed <- refRed %>% left_join(refCount,
                               by = c("Ref"="Ref"))

# Grouping the data by Referees and looking at how many yellow cards they gave that season.
refYellow <- SeasonResult %>% group_by(Ref) %>%
  summarize(Yellow = sum(yellow_card_home_team+yellow_card_away_team, na.rm = T))

# Merging the two tables together
refRedYellow <- refRed %>% left_join(refYellow,
                               by = c("Ref"="Ref"))

refRedYellow
## # A tibble: 19 × 4
##    Ref                   Red Games Yellow
##    <chr>               <int> <int>  <int>
##  1 Álvarez Izquierdo       5    21    131
##  2 Ayza Gámez              4    19     84
##  3 Clos Gómez              3    20    115
##  4 Delgado Ferreiro        1    18     85
##  5 Estrada Fernández       2    18    100
##  6 Fernández Borbalán      6    18    105
##  7 González González       2    19     99
##  8 Iglesias Villanueva     1    19    101
##  9 Iturralde González      4    17     98
## 10 Mateu Lahoz             3    20     69
## 11 Muñiz Fernández         6    24    154
## 12 Paradas Romero          2    17     87
## 13 Pérez Lasa              3    18     94
## 14 Ramírez Domínguez       3    20    111
## 15 Rubinos Pérez           5    15     67
## 16 Teixeira Vitienes       4    38    170
## 17 Turienzo Álvarez        1    22    125
## 18 Undiano Mallenco        0    21    120
## 19 Velasco Carballo        3    16     98
# Information from the above table in a barchart.

refRedYellow %>% ggplot() +
  geom_bar(aes(reorder(Ref, Games), Games, fill = Red), color = "black", stat="identity")+
  scale_fill_gradient(low = "white", high = "darkred")+
  coord_flip()+
  theme_minimal()+
  labs(title = "Games the referees officiated and number of Red Cards",
       x = "Referee",
       y = "Number of Games Officiated")+
  scale_y_continuous(breaks = seq(0,38,2))+
  theme(legend.position = "bottom")

refRedYellow %>% ggplot() +
  geom_bar(aes(reorder(Ref, Games), Games, fill = Yellow),  color = "black", stat="identity")+
  scale_fill_gradient(low = "white", high = "yellow")+
  coord_flip()+
  theme_minimal()+
  labs(title = "Games the referees officiated and number of Yellow Cards",
       x = "Referee",
       y = "Number of Games Officiated")+
  scale_y_continuous(breaks = seq(0,38,2))+
  theme(legend.position = "bottom")

# Creating two individual box plots that give information on the home and away possession of all team for the season. 

homePossession <- SeasonResult %>% 
  ggplot()+
  geom_boxplot(aes(x = reorder(HomeTeam, possession_home_team), fill = HomeTeam, y = possession_home_team))+
  theme(axis.text.x = element_text(angle = 90))+
  labs(title = "Relationship between Home Teams and their Possession",
       x = "Home Teams",
       y = "Possession")+
  coord_flip()+
  theme_bw()+
  theme(legend.position="none")

ggplotly(homePossession)
awayPossession <- SeasonResult %>% 
  ggplot()+
  geom_boxplot(aes(x = reorder(AwayTeam, possession_away_team), fill = AwayTeam, y = possession_away_team))+
  theme(axis.text.x = element_text(angle = 90))+
  labs(title = "Relationship between Away Teams and their Possession",
       x = "Away Teams",
       y = "Possession")+
  coord_flip()+
  theme_bw()+
  theme(legend.position="none")

ggplotly(awayPossession)
# Tree map that shows what proportion of points each team won. 
treemap(SeasonTable, index=c("Teams"),vSize="Points", vColor = "GF", palette = "Set3",
        fontsize.labels=c(8), fontcolor.labels=c("black"), border.col=c("white"))

# Grouping Home and Away teams to get how many goals each scored in home and away matches respectively. We then merge them together to get the number of home goals and away goals for each team.

seasonHomeGoals <- SeasonResult %>% group_by(HomeTeam) %>% 
  summarize(GoalsHome = sum(home_team_goal, na.rm = T))

seasonAwayGoals <- SeasonResult %>% group_by(AwayTeam) %>% 
  summarize(GoalsAway = sum(away_team_goal, na.rm = T))

seasonGoals <- seasonHomeGoals %>% left_join(seasonAwayGoals,
                                             by = c("HomeTeam"="AwayTeam"))
seasonGoals <- seasonGoals %>% 
  rename("Team" = "HomeTeam")
seasonGoals
## # A tibble: 20 × 3
##    Team                      GoalsHome GoalsAway
##    <chr>                         <int>     <int>
##  1 Athletic Club de Bilbao          32        27
##  2 Atlético Madrid                  35        27
##  3 CA Osasuna                       28        17
##  4 FC Barcelona                     46        49
##  5 Getafe CF                        33        16
##  6 Hércules Club de Fútbol          27         9
##  7 Levante UD                       25        16
##  8 Málaga CF                        29        25
##  9 Racing Santander                 25        16
## 10 RC Deportivo de La Coruña        22         9
## 11 RCD Espanyol                     33        13
## 12 RCD Mallorca                     25        16
## 13 Real Madrid CF                   61        41
## 14 Real Sociedad                    27        22
## 15 Real Sporting de Gijón           23        12
## 16 Real Zaragoza                    26        14
## 17 Sevilla FC                       35        27
## 18 UD Almería                       23        13
## 19 Valencia CF                      34        30
## 20 Villarreal CF                    33        21
# The table we got was in a wide form, but to get at stacked bar chart, we need it in a longer format. Hence changing it to a longer format.

seasonGoalsL <- seasonGoals %>% pivot_longer(cols = c("GoalsHome":"GoalsAway"),
                                             names_to = "Goals",
                                             values_to = "Count")
seasonGoalsL
## # A tibble: 40 × 3
##    Team                    Goals     Count
##    <chr>                   <chr>     <int>
##  1 Athletic Club de Bilbao GoalsHome    32
##  2 Athletic Club de Bilbao GoalsAway    27
##  3 Atlético Madrid         GoalsHome    35
##  4 Atlético Madrid         GoalsAway    27
##  5 CA Osasuna              GoalsHome    28
##  6 CA Osasuna              GoalsAway    17
##  7 FC Barcelona            GoalsHome    46
##  8 FC Barcelona            GoalsAway    49
##  9 Getafe CF               GoalsHome    33
## 10 Getafe CF               GoalsAway    16
## # … with 30 more rows
# Visualizing the seasonGoalsL table with a stacked bar chart.

seasonGoalsL %>% ggplot()+
  geom_bar(aes(reorder(Team, Count), Count, fill=Goals), position="stack", stat="identity")+
  scale_fill_viridis(discrete = T)+
  coord_flip()+
  theme_bw()+
  labs(title = "Home Goals and Away Goals for Teams in the League for given Season",
       x = "Teams",
       y = "Number of Goals")+
  scale_y_continuous(breaks = seq(0,150,10))+
  theme(legend.position = "bottom", text = element_text(size = 10))

#creating a function that takes variable amount of parameters. In this case any team that played in the season and league passed in the seasonalData function. 
stagePointsGoals <- function(names){ 
  allTeams <- list(names) 
    
  Team <- c()
  Points <- c()
  TotalGoalsFor <- c()
  TotalGoalsAgainst <- c()
  Stage <- c()
  Shots <- c()
  Goal <- c()
  df <- data.frame(Team, Points, Stage, TotalGoalsFor, TotalGoalsAgainst, Shots, Goal) # New data frame called "df"
  
  # For each team passed as argument in the function we populate the table one team after another.
  for(name in names){
    subs <- SeasonResult[SeasonResult$HomeTeam == name | SeasonResult$AwayTeam == name,] # Selecting games that the team played, Home or Away
    for (i in 1:nrow(subs)){ #Going through each of those games
      if (subs$HomeTeam[i] == name){
        df[nrow(df) + 1,1] <- name
        df[nrow(df),2] <- subs$HomePoints[i]
        df[nrow(df),3] <- subs$stage[i]
        df[nrow(df),4] <- subs$HomeGoalsFor[i]
        df[nrow(df),5] <- subs$HomeGoalsAgainst[i]
        df[nrow(df),6] <- subs$on_target_shot_home_team[i]+subs$off_target_shot_home_team[i]
        df[nrow(df),7] <- subs$home_team_goal[i]
        
      }else if(subs$AwayTeam[i] == name){
        df[nrow(df) + 1,1] <- name
        df[nrow(df),2] <- subs$AwayPoints[i]
        df[nrow(df),3] <- subs$stage[i]
        df[nrow(df),4] <- subs$AwayGoalsFor[i]
        df[nrow(df),5] <- subs$AwayGoalsAgainst[i]
        df[nrow(df),6] <- subs$on_target_shot_away_team[i]+subs$off_target_shot_away_team[i]
        df[nrow(df),7] <- subs$away_team_goal[i]
      }
    }
  }
  colnames(df) <- c("Team", "Points", "Stage", "TotalGoalsFor", "TotalGoalsAgainst", "Shots", "Goal") #For some reason the variable names changed to random names. Renaming them.
  return(df)
  }
# Using the stagePointGoals function we get match by match statistics for each team we passed in the argument.

selected<-stagePointsGoals(c("FC Barcelona", "Real Madrid CF", "Sevilla FC", "RCD Espanyol", "Real Sporting de Gijón"))
# Using the selected table, creating a line charts for points throughout each stages for each teams.

pointLines <- 
  selected %>%
  ggplot()+
  stat_summary(aes(x = Stage,
                   y = Points,
                   group = Team, color = Team),
               geom = "line", linewidth = 0.5) +
  labs(title = "Total Points per Stage throughout the season",
       x = "Stage",
       y = "Points")+
  theme_light()+
  scale_y_continuous(breaks = seq(0,100,10))+
  scale_x_continuous(breaks = seq(0,38,2))+
  theme(legend.position = "bottom")

ggplotly(pointLines)
## No summary function supplied, defaulting to `mean_se()`
# Using the selected table, creating a line charts for goals scored throughout each stages by each teams.
goalLines <- selected %>% ggplot() +
  stat_summary(aes(x = Stage,
                   y = TotalGoalsFor,
                   group = Team, color = Team),
               geom = "line", linewidth = 0.5) +
  labs(title = "Total Goals per Stage throughout the season",
       x = "Stage",
       y = "Goals")+
  theme_light()+
  scale_y_continuous(breaks = seq(0,100,10))+
  scale_x_continuous(breaks = seq(0,38,2))+
  theme(legend.position = "bottom")

ggplotly(goalLines)
## No summary function supplied, defaulting to `mean_se()`
# Creating a stacked area chart to get the information on how many goals each team scored throughout the season and also the total goal contribution of those teams in the season.

ggplot(selected, aes(x=Stage, y=TotalGoalsFor, fill=Team))+
  geom_area(alpha=0.8 , size=.5, colour="white")+
  labs(title = "Total Goals Scored per Stage throughout the season",
       x = "Stage",
       y = "Total Goals For")+
  scale_fill_viridis(discrete = T)+
  theme_ipsum()+
  theme_bw()+
  scale_y_continuous(breaks = seq(0,500,40))+
  scale_x_continuous(breaks = seq(0,38,2)) +
  theme(legend.position = "bottom")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

# Creating a stacked area chart to get the information on how many goals each team conceded throughout the season and also the total goal contribution of those teams in the season.

ggplot(selected, aes(x=Stage, y=TotalGoalsAgainst, fill=Team))+
  geom_area(alpha=0.8 , size=.5, colour="white")+
  labs(title = "Total Goals Conceded per Stage throughout the season",
       x = "Stage",
       y = "Total Goals Against")+
  scale_fill_viridis(discrete = T)+
  theme_ipsum()+
  theme_bw()+
  scale_y_continuous(breaks = seq(0,500,40))+
  scale_x_continuous(breaks = seq(0,38,2)) +
  theme(legend.position = "bottom")